In [ ]:
import os
import rasterio
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score
from sklearn.model_selection import StratifiedKFold
In [ ]:
# Set folder and read TIF files
folder = "data_m/sentinel-2" # Corrected typo in path
tifs = sorted([os.path.join(folder, f) for f in os.listdir(folder) if f.endswith("tif")])
b8, *stack = tifs
In [ ]:
b8
Out[ ]:
'data_m/sentinel-2\\B02.tif'
In [ ]:
# Read and plot high-resolution raster
with rasterio.open(b8) as src:
highres = src.read(1)
plt.imshow(highres, cmap='turbo')
plt.show()
In [ ]:
def read_bands_sentinel2(directory):
bands = []
band_files = ['B02.tif', 'B03.tif', 'B04.tif', 'B05.tif', 'B06.tif', 'B07.tif', 'B08.tif', 'B8A.tif']
for band_file in band_files:
file_path = f"{directory}/{band_file}"
with rasterio.open(file_path) as dataset:
bands.append(dataset.read(1))
return bands
# Sentinel-2 bands
sentinel2_bands = read_bands_sentinel2("data_m/sentinel-2")
In [ ]:
In [ ]:
import numpy as np
# Visualization function for Sentinel-2
def visualize_sentinel2(sentinel2_bands, upper_percentile):
# Standard RGB composite for Sentinel-2: B04 (Red), B03 (Green), B02 (Blue)
rgb = np.stack([sentinel2_bands[2], sentinel2_bands[1], sentinel2_bands[0]], axis=-1)
max_val = np.percentile(rgb, upper_percentile * 100)
rgb_normalized = np.clip(rgb, 0, max_val) / max_val
plt.imshow(rgb_normalized)
plt.axis('off')
plt.show()
visualize_sentinel2(sentinel2_bands, 0.90)
In [ ]:
sentinel2_bands[0]
Out[ ]:
array([[1964, 1433, 1276, ..., 1324, 1295, 1288],
[1977, 1463, 1295, ..., 1287, 1272, 1284],
[2006, 1514, 1286, ..., 1277, 1276, 1262],
...,
[1188, 1196, 1201, ..., 1127, 1110, 1160],
[1191, 1202, 1196, ..., 1117, 1135, 1143],
[1219, 1227, 1207, ..., 1112, 1143, 1142]], dtype=uint16)
In [ ]:
def brightness(sentinel2):
# Placeholder coefficients for Sentinel-2 brightness index
b2, b3, b4, b8 = sentinel2[:4] # Using first four bands as an example
return b2 * 0.3 + b3 * 0.3 + b4 * 0.3 + b8 * 0.1
def greenness(sentinel2):
# Placeholder coefficients for Sentinel-2 greenness index
b2, b3, b4, b8 = sentinel2[:4] # Using first four bands as an example
return b3 * 0.5 - b2 * 0.2 - b4 * 0.2 - b8 * 0.1
def wetness(sentinel2):
# Placeholder coefficients for Sentinel-2 wetness index
b2, b3, b4, b8 = sentinel2[:4] # Using first four bands as an example
return b8 * 0.5 - b3 * 0.2 - b2 * 0.2 - b4 * 0.1
def mndwi(sentinel2):
green, nir = sentinel2[1], sentinel2[6] # B03 (Green) and B08 (NIR)
return (green - nir) / (green + nir + 1e-10)
def ndbi(sentinel2):
red, nir = sentinel2[2], sentinel2[6] # B04 (Red) and B08 (NIR)
return (nir - red) / (nir + red + 1e-10)
def ndvi(sentinel2):
nir, red = sentinel2[6], sentinel2[2] # B08 (NIR) and B04 (Red)
return (nir - red) / (nir + red + 1e-10)
def savi(sentinel2, L=0.5):
nir, red = sentinel2[6], sentinel2[2] # B08 (NIR) and B04 (Red)
return ((nir - red) / (nir + red + L + 1e-10)) * (1 + L)
In [ ]:
from rasterio.enums import Resampling
import rasterio
def resample_band(band, target_shape):
# Adjust band data to target shape
data = band.read(
out_shape=target_shape,
resampling=Resampling.bilinear
)
return data
# Define the path to your Sentinel-2 bands
folder = "data_m/sentinel-2"
tifs = sorted([os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(".tif")])
# Assuming you want to resample to the shape of the first band
with rasterio.open(tifs[0]) as first_band:
target_shape = first_band.shape
# Resample all bands
resampled_bands = []
for tif in tifs:
with rasterio.open(tif) as band:
resampled_data = resample_band(band, target_shape=target_shape)
resampled_bands.append(resampled_data)
In [ ]:
sentinel2_bands = resampled_bands
In [ ]:
# List of functions
functions = [mndwi, ndbi, ndvi, savi, brightness, greenness, wetness]
# Apply each function to the landsat data
# This will create a list of tuples with the function name and its output raster
smrs = [(func.__name__, func(sentinel2_bands)) for func in functions]
# If you need a dictionary instead of a list of tuples:
smrs_dict = {func.__name__: func(sentinel2_bands) for func in functions}
In [ ]:
import numpy as np
# Reshape the resampled bands
resampled_bands_2d = [np.squeeze(band) for band in resampled_bands]
# Check the shape again
print("Shape after squeezing:", resampled_bands_2d[0].shape)
# Try plotting again
plt.imshow(resampled_bands_2d[1], cmap='turbo') # Plotting the second band
plt.colorbar()
plt.title('Example Resampled Band')
plt.show()
Shape after squeezing: (2341, 2928)
In [ ]:
# Normalize the band data for visualization
normalized_band = (resampled_bands_2d[1] - resampled_bands_2d[1].min()) / (resampled_bands_2d[1].max() - resampled_bands_2d[1].min())
plt.imshow(normalized_band, cmap='turbo')
plt.colorbar()
plt.title('Normalized Resampled Band')
plt.show()
In [ ]:
# Squeeze the 'greenness_raster' to ensure it is 2D
greenness_raster_2d = np.squeeze(greenness_raster)
# Now let's recalculate the percentiles on the squeezed array
vmin, vmax = np.percentile(greenness_raster_2d, [5, 95]) # Adjust the percentiles as needed
# Choose a different colormap
cmap = plt.cm.viridis
# Plot the greenness index with the adjusted color scale and colormap
plt.imshow(greenness_raster_2d, cmap=cmap, vmin=vmin, vmax=vmax)
plt.colorbar()
plt.title('Greenness Index')
plt.show()
In [ ]:
# Apply Laplacian of Gaussian (LoG) and plot the subsection of the filtered image
sigma = 3
out2 = gaussian_laplace(highres, sigma=sigma)
plt.imshow(out2, cmap='turbo') # Remove the slicing to see the full image
plt.colorbar()
plt.title('Laplacian of Gaussian Filtered Image')
plt.show()
In [ ]:
# Resample and create the combined raster stack
with rasterio.open(b8) as src: # Assuming b8 is still the file path to the B08 band
affine = src.transform
crs = src.crs
target_shape = src.shape
# Assuming `smrs` is a list of tuples with (name, raster) for Sentinel-2
crst = sentinel2_sr + [band for name, band in smrs]
In [ ]:
crst
Out[ ]:
[array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
...,
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
...,
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
...,
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
...,
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
...,
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
...,
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
...,
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
array([[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
...,
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34],
[-3.3999998e+34, -3.3999998e+34, -3.3999998e+34, ...,
-3.3999998e+34, -3.3999998e+34, -3.3999998e+34]], dtype=float32),
array([[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
...,
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.]], dtype=float32),
array([[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
...,
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.]], dtype=float32),
array([[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
...,
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.]], dtype=float32),
array([[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
...,
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.],
[-0., -0., -0., ..., -0., -0., -0.]], dtype=float32),
array([[-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
-3.4000002e+38, -3.4000002e+38, -3.4000002e+38],
[-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
-3.4000002e+38, -3.4000002e+38, -3.4000002e+38],
[-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
-3.4000002e+38, -3.4000002e+38, -3.4000002e+38],
...,
[-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
-3.4000002e+38, -3.4000002e+38, -3.4000002e+38],
[-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
-3.4000002e+38, -3.4000002e+38, -3.4000002e+38],
[-3.4000002e+38, -3.4000002e+38, -3.4000002e+38, ...,
-3.4000002e+38, -3.4000002e+38, -3.4000002e+38]], dtype=float32),
array([[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30],
[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30],
[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30],
...,
[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30],
[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30],
[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30]], dtype=float32),
array([[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30],
[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30],
[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30],
...,
[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30],
[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30],
[2.5353012e+30, 2.5353012e+30, 2.5353012e+30, ..., 2.5353012e+30,
2.5353012e+30, 2.5353012e+30]], dtype=float32)]
In [ ]:
# Assuming landsat is a list of numpy arrays for each band
landsat_sr = [dn_to_reflectance(band) for band in landsat]
# Now you can replace infinite values with 0.0 for each band
landsat_sr = [np.where(np.isinf(band), 0.0, band) for band in landsat_sr]
In [ ]:
import numpy as np
import rasterio
from rasterio.warp import reproject, Resampling
# Define the function to convert digital numbers to reflectance for Sentinel-2
def sentinel_dn_to_reflectance(array, band_number, scale_factor, add_constant):
# The scale factor and add_constant values should be retrieved from the metadata
# For the sake of this example, we're using placeholders
# Sentinel-2's scale factor is generally 0.0001 for reflectance bands
return array * scale_factor + add_constant
# Read and process Sentinel-2 bands
sentinel2_bands = read_bands_sentinel2("data_m/sentinel-2")
# Convert Sentinel-2 bands to reflectance
# You will need to look up the correct scale factors and add_constants for each band
scale_factors = [0.0001] * len(sentinel2_bands) # Replace with correct values from metadata
add_constants = [0.0] * len(sentinel2_bands) # Replace with correct values from metadata
sentinel2_sr = [sentinel_dn_to_reflectance(band, i, scale_factors[i], add_constants[i])
for i, band in enumerate(sentinel2_bands)]
# Replace infinite values with 0.0 for each band
sentinel2_sr = [np.where(np.isinf(band), 0.0, band) for band in sentinel2_sr]
# Assuming `smrs` is a list of tuples with (name, raster) for Sentinel-2
# Replace 'landsat_sr' with 'sentinel2_sr' to create the combined raster stack
crst = sentinel2_sr + [band for name, band in smrs]
# Assuming b8 is the file path to the B08 band which is typically used for high-resolution analysis
with rasterio.open(b8) as src:
affine = src.transform
crs = src.crs
target_transform = src.transform # Assuming b8 has the transform we want to match
target_shape = src.shape
In [ ]:
crst
Out[ ]:
[array([[0.1964, 0.1433, 0.1276, ..., 0.1324, 0.1295, 0.1288],
[0.1977, 0.1463, 0.1295, ..., 0.1287, 0.1272, 0.1284],
[0.2006, 0.1514, 0.1286, ..., 0.1277, 0.1276, 0.1262],
...,
[0.1188, 0.1196, 0.1201, ..., 0.1127, 0.111 , 0.116 ],
[0.1191, 0.1202, 0.1196, ..., 0.1117, 0.1135, 0.1143],
[0.1219, 0.1227, 0.1207, ..., 0.1112, 0.1143, 0.1142]]),
array([[0.231 , 0.1711, 0.1446, ..., 0.1696, 0.1694, 0.1684],
[0.2408, 0.17 , 0.1481, ..., 0.1678, 0.1662, 0.1658],
[0.2406, 0.1766, 0.1497, ..., 0.1656, 0.1656, 0.1648],
...,
[0.1345, 0.1404, 0.1365, ..., 0.1316, 0.1281, 0.1277],
[0.1332, 0.1344, 0.1357, ..., 0.1276, 0.129 , 0.1302],
[0.1373, 0.1383, 0.1414, ..., 0.1284, 0.1294, 0.13 ]]),
array([[0.2626, 0.1728, 0.1302, ..., 0.1297, 0.1294, 0.1273],
[0.2728, 0.1682, 0.1313, ..., 0.1275, 0.1277, 0.1274],
[0.2732, 0.18 , 0.1352, ..., 0.126 , 0.1252, 0.1268],
...,
[0.1183, 0.1177, 0.1179, ..., 0.1149, 0.1154, 0.1171],
[0.1183, 0.1186, 0.1186, ..., 0.1151, 0.1187, 0.1162],
[0.1207, 0.1213, 0.1218, ..., 0.1149, 0.1178, 0.1164]]),
array([[0.2901, 0.1922, 0.1587, ..., 0.2276, 0.2253, 0.2242],
[0.3016, 0.1862, 0.1635, ..., 0.2096, 0.2118, 0.209 ],
[0.2931, 0.203 , 0.1651, ..., 0.208 , 0.209 , 0.2095],
...,
[0.1652, 0.1676, 0.1629, ..., 0.16 , 0.1487, 0.1434],
[0.1609, 0.1592, 0.1582, ..., 0.1649, 0.1671, 0.1585],
[0.1626, 0.1635, 0.1672, ..., 0.1555, 0.1558, 0.1543]]),
array([[0.3325, 0.3737, 0.3844, ..., 0.4647, 0.4765, 0.4754],
[0.3344, 0.3735, 0.3951, ..., 0.5097, 0.5301, 0.5272],
[0.3275, 0.3748, 0.3989, ..., 0.5057, 0.5215, 0.5254],
...,
[0.3439, 0.3449, 0.3459, ..., 0.338 , 0.2901, 0.2536],
[0.3425, 0.3312, 0.315 , ..., 0.3686, 0.3328, 0.3005],
[0.3411, 0.3417, 0.3418, ..., 0.3081, 0.3013, 0.2982]]),
array([[0.3697, 0.4739, 0.5222, ..., 0.5294, 0.556 , 0.5763],
[0.359 , 0.4785, 0.5366, ..., 0.6067, 0.6305, 0.6301],
[0.3456, 0.4775, 0.5266, ..., 0.5901, 0.6252, 0.6387],
...,
[0.403 , 0.4104, 0.3919, ..., 0.4121, 0.3258, 0.2892],
[0.3872, 0.3911, 0.3851, ..., 0.4185, 0.4135, 0.3657],
[0.3914, 0.403 , 0.4041, ..., 0.3544, 0.3452, 0.3397]]),
array([[0.374 , 0.4548, 0.5104, ..., 0.618 , 0.6104, 0.6008],
[0.3688, 0.4544, 0.5176, ..., 0.6172, 0.6168, 0.6148],
[0.3648, 0.456 , 0.5212, ..., 0.6244, 0.6268, 0.6308],
...,
[0.3894, 0.4204, 0.3976, ..., 0.4344, 0.357 , 0.3104],
[0.3954, 0.4028, 0.4096, ..., 0.347 , 0.3676, 0.32 ],
[0.4068, 0.4082, 0.4264, ..., 0.3162, 0.3584, 0.331 ]]),
array([[0.3828, 0.4992, 0.542 , ..., 0.5731, 0.5838, 0.5867],
[0.3881, 0.4936, 0.5592, ..., 0.6404, 0.6662, 0.6715],
[0.3758, 0.4844, 0.5518, ..., 0.6358, 0.6524, 0.6625],
...,
[0.4368, 0.4194, 0.43 , ..., 0.4393, 0.3731, 0.3202],
[0.4172, 0.4136, 0.3967, ..., 0.4508, 0.415 , 0.3864],
[0.4243, 0.4361, 0.4307, ..., 0.3875, 0.3743, 0.3661]]),
array([[[10.59603306, 10.01741492, 9.4470229 , ..., 7.75165058,
7.83867658, 7.95787832],
[10.54068241, 10.04035874, 9.28961995, ..., 7.77605096,
7.79438059, 7.82039457],
[10.62008589, 9.91811571, 9.21463705, ..., 7.71493671,
7.68854114, 7.65158371],
...,
[12.02271426, 11.18687589, 11.78150159, ..., 11.04381625,
13.03793032, 14.54211367],
[11.9020053 , 11.69992554, 11.51604621, ..., 13.34639697,
12.71647201, 14.13549534],
[11.54953134, 11.49807868, 11.04015498, ..., 14.31803869,
12.96555966, 13.78004338]]]),
array([[[0.17499215, 0.44933078, 0.59350609, ..., 0.65306941,
0.65017572, 0.65032276],
[0.14962594, 0.45968519, 0.59531515, ..., 0.65758023,
0.65695097, 0.65669631],
[0.14357367, 0.43396226, 0.58805606, ..., 0.6641791 ,
0.66702128, 0.66525871],
...,
[0.53397676, 0.56253484, 0.54258002, ..., 0.58164937,
0.51143099, 0.45216374],
[0.53941989, 0.54507096, 0.55092768, ..., 0.50183943,
0.51182398, 0.46721687],
[0.54236967, 0.54183192, 0.55563663, ..., 0.46694502,
0.5052499 , 0.47966026]]]),
array([[[0.17499215, 0.44933078, 0.59350609, ..., 0.65306941,
0.65017572, 0.65032276],
[0.14962594, 0.45968519, 0.59531515, ..., 0.65758023,
0.65695097, 0.65669631],
[0.14357367, 0.43396226, 0.58805606, ..., 0.6641791 ,
0.66702128, 0.66525871],
...,
[0.53397676, 0.56253484, 0.54258002, ..., 0.58164937,
0.51143099, 0.45216374],
[0.53941989, 0.54507096, 0.55092768, ..., 0.50183943,
0.51182398, 0.46721687],
[0.54236967, 0.54183192, 0.55563663, ..., 0.46694502,
0.5052499 , 0.47966026]]]),
array([[[0.2624676 , 0.67394248, 0.89018965, ..., 0.97953862,
0.97519768, 0.97541715],
[0.22442141, 0.68947242, 0.89290392, ..., 0.98630413,
0.98536028, 0.98497811],
[0.21534363, 0.65089223, 0.88201691, ..., 0.99620228,
1.00046539, 0.99782221],
...,
[0.80088626, 0.84372387, 0.8137911 , ..., 0.87239465,
0.7670653 , 0.6781663 ],
[0.80905109, 0.81752805, 0.8263133 , ..., 0.7526777 ,
0.76765704, 0.70074499],
[0.8134774 , 0.81267114, 0.83337893, ..., 0.70033631,
0.75779528, 0.71940999]]]),
array([[[2360.1, 1727.2, 1423.9, ..., 1520.1, 1509.4, 1497.7],
[2426.9, 1720.9, 1443. , ..., 1493.5, 1484.1, 1485.2],
[2441.9, 1795. , 1456. , ..., 1472.5, 1468.6, 1466.2],
...,
[1276.1, 1294.2, 1284. , ..., 1240.2, 1222.7, 1239.9],
[1274. , 1281.8, 1284.1, ..., 1221. , 1239.8, 1237.5],
[1302.3, 1309.7, 1315. , ..., 1218.9, 1239.2, 1236.1]]]),
array([[[-53.1, -42.3, -9.3, ..., 98.8, 104.7, 105.6],
[-30. , -46.4, 2.6, ..., 105.1, 100.4, 97. ],
[-43.3, -50.8, 5.4, ..., 106. , 109. , 105.2],
...,
[ 37. , 66.3, 46. , ..., 40.2, 28.5, 14.8],
[ 29. , 32.2, 39.7, ..., 26.6, 24.4, 34.6],
[ 38.7, 40.7, 58.7, ..., 34.4, 28.1, 34.5]]]),
array([[[333.1, 526.4, 408.9, ..., 391.3, 395.3, 399.3],
[315.2, 536.2, 395. , ..., 387. , 389.5, 386.2],
[337.9, 519. , 385.7, ..., 360.4, 355.4, 355.2],
...,
[181.6, 167.8, 171.4, ..., 209.5, 202.4, 183. ],
[188.1, 183.2, 182.8, ..., 195.3, 177.3, 171.8],
[173.9, 170.7, 170.5, ..., 182.9, 168.3, 166.7]]])]
In [ ]:
# Function to resample an array to match the target raster
def resample_to_raster(src_array, src_transform, src_crs, target_raster_path):
with rasterio.open(target_raster_path) as target_raster:
target_shape = target_raster.shape
target_transform = target_raster.transform
target_crs = target_raster.crs
resampled_array = np.empty(target_shape, dtype=src_array.dtype)
reproject(
source=src_array,
destination=resampled_array,
src_transform=src_transform,
src_crs=src_crs,
dst_transform=target_transform,
dst_crs=target_crs,
resampling=Resampling.bilinear
)
return resampled_array
# Assuming crst is a list of numpy arrays representing the combined raster stack
# Define paths to training and validation set rasters
train_raster_path = os.path.join("data_m/munich_training.tif")
val_raster_path = os.path.join("data_m/munich_validation.tif")
In [ ]:
# Open the first raster in sentinel2_bands to get crs and transform
with rasterio.open(b8) as src: # Assuming you want to use B02 for crs and transform
src_transform = src.transform
src_crs = src.crs
# Resample your stack to the training and validation datasets
trainrs = [resample_to_raster(raster, src_transform, src_crs, train_raster_path) for raster in crst]
valrs = [resample_to_raster(raster, src_transform, src_crs, val_raster_path) for raster in crst]
In [ ]:
valrs
Out[ ]:
[array([[0.15356918, 0.18619078, 0.21632533, ..., 0. , 0. ,
0. ],
[0.15996372, 0.18228472, 0.20241405, ..., 0. , 0. ,
0. ],
[0.17756013, 0.18499236, 0.17683913, ..., 0. , 0. ,
0. ],
...,
[0.16763321, 0.19904838, 0.19861306, ..., 0. , 0. ,
0. ],
[0.15188736, 0.16774144, 0.16060217, ..., 0. , 0. ,
0. ],
[0.12164528, 0.12859054, 0.12780843, ..., 0. , 0. ,
0. ]]),
array([[0.17233257, 0.20223479, 0.23958778, ..., 0. , 0. ,
0. ],
[0.18619048, 0.20579693, 0.22255609, ..., 0. , 0. ,
0. ],
[0.20616019, 0.21201495, 0.20929842, ..., 0. , 0. ,
0. ],
...,
[0.18307601, 0.19629549, 0.20731164, ..., 0. , 0. ,
0. ],
[0.16875808, 0.17551845, 0.17535673, ..., 0. , 0. ,
0. ],
[0.14366143, 0.15133531, 0.15135189, ..., 0. , 0. ,
0. ]]),
array([[0.17475415, 0.21418065, 0.24274322, ..., 0. , 0. ,
0. ],
[0.18589872, 0.21669778, 0.23965327, ..., 0. , 0. ,
0. ],
[0.2142721 , 0.22097837, 0.20410899, ..., 0. , 0. ,
0. ],
...,
[0.17100312, 0.19422466, 0.19845509, ..., 0. , 0. ,
0. ],
[0.15645082, 0.16641619, 0.16262694, ..., 0. , 0. ,
0. ],
[0.12881286, 0.13739064, 0.1358647 , ..., 0. , 0. ,
0. ]]),
array([[0.3674605 , 0.3435625 , 0.26316939, ..., 0. , 0. ,
0. ],
[0.28069858, 0.26028224, 0.22475733, ..., 0. , 0. ,
0. ],
[0.22989208, 0.22338268, 0.22031715, ..., 0. , 0. ,
0. ],
...,
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ]]),
array([[0.39066329, 0.37364253, 0.35338183, ..., 0. , 0. ,
0. ],
[0.36439798, 0.34903737, 0.34226339, ..., 0. , 0. ,
0. ],
[0.35202646, 0.3522999 , 0.3587746 , ..., 0. , 0. ,
0. ],
...,
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ]]),
array([[0.40650246, 0.39234175, 0.38610653, ..., 0. , 0. ,
0. ],
[0.39558502, 0.38585678, 0.38821847, ..., 0. , 0. ,
0. ],
[0.3875339 , 0.38248344, 0.39315632, ..., 0. , 0. ,
0. ],
...,
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ]]),
array([[0.30985396, 0.30146089, 0.343658 , ..., 0. , 0. ,
0. ],
[0.37149817, 0.34317773, 0.34984341, ..., 0. , 0. ,
0. ],
[0.39669522, 0.39020706, 0.39714359, ..., 0. , 0. ,
0. ],
...,
[0.31319273, 0.32184773, 0.34604266, ..., 0. , 0. ,
0. ],
[0.31544681, 0.32553554, 0.32265193, ..., 0. , 0. ,
0. ],
[0.34762168, 0.37586748, 0.36707344, ..., 0. , 0. ,
0. ]]),
array([[0.41323875, 0.40598401, 0.40695634, ..., 0. , 0. ,
0. ],
[0.40930351, 0.40053686, 0.41095442, ..., 0. , 0. ,
0. ],
[0.41364292, 0.41470789, 0.41992427, ..., 0. , 0. ,
0. ],
...,
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
0. ]]),
array([[13.401633 , 12.92216089, 11.14805342, ..., 0. ,
0. , 0. ],
[11.48559297, 11.7594568 , 11.25912044, ..., 0. ,
0. , 0. ],
[10.55857546, 10.59144262, 10.49773871, ..., 0. ,
0. , 0. ],
...,
[13.15631855, 12.47885467, 11.65207668, ..., 0. ,
0. , 0. ],
[13.2758039 , 12.80507447, 12.92077035, ..., 0. ,
0. , 0. ],
[12.95558034, 12.04102709, 12.25940703, ..., 0. ,
0. , 0. ]]),
array([[0.27884161, 0.16849986, 0.17116097, ..., 0. , 0. ,
0. ],
[0.33039098, 0.22417862, 0.18560495, ..., 0. , 0. ,
0. ],
[0.29971925, 0.27785492, 0.32241209, ..., 0. , 0. ,
0. ],
...,
[0.2952175 , 0.24948777, 0.26908283, ..., 0. , 0. ,
0. ],
[0.33872611, 0.32644652, 0.33340774, ..., 0. , 0. ,
0. ],
[0.45839707, 0.4631294 , 0.45794567, ..., 0. , 0. ,
0. ]]),
array([[0.27884161, 0.16849986, 0.17116097, ..., 0. , 0. ,
0. ],
[0.33039098, 0.22417862, 0.18560495, ..., 0. , 0. ,
0. ],
[0.29971925, 0.27785492, 0.32241209, ..., 0. , 0. ,
0. ],
...,
[0.2952175 , 0.24948777, 0.26908283, ..., 0. , 0. ,
0. ],
[0.33872611, 0.32644652, 0.33340774, ..., 0. , 0. ,
0. ],
[0.45839707, 0.4631294 , 0.45794567, ..., 0. , 0. ,
0. ]]),
array([[0.41821884, 0.25272525, 0.25671959, ..., 0. , 0. ,
0. ],
[0.49554219, 0.33623796, 0.27838393, ..., 0. , 0. ,
0. ],
[0.44954192, 0.41674814, 0.48357774, ..., 0. , 0. ,
0. ],
...,
[0.4427795 , 0.37419478, 0.4035873 , ..., 0. , 0. ,
0. ],
[0.50803488, 0.48961945, 0.50005929, ..., 0. , 0. ,
0. ],
[0.68752342, 0.69462648, 0.68685031, ..., 0. , 0. ,
0. ]]),
array([[1714.15914624, 2020.78022215, 2322.0051347 , ..., 0. ,
0. , 0. ],
[1814.46177141, 2033.68098747, 2223.2570349 , ..., 0. ,
0. , 0. ],
[2033.58796385, 2094.12347237, 2014.33166583, ..., 0. ,
0. , 0. ],
...,
[1753.58904921, 1960.79321291, 2008.6562132 , ..., 0. ,
0. , 0. ],
[1612.91162565, 1717.58213304, 1688.93630121, ..., 0. ,
0. , 0. ],
[1357.50924432, 1434.08049557, 1431.57090066, ..., 0. ,
0. , 0. ]]),
array([[-7.17523299, -2.53051898, 53.76568205, ..., 0. ,
0. , 0. ],
[20.9245266 , 11.6769757 , -0.7409797 , ..., 0. ,
0. , 0. ],
[ 7.52579398, 7.96689003, 41.00378592, ..., 0. ,
0. , 0. ],
...,
[49.65539476, 2.84371525, 46.90509594, ..., 0. ,
0. , 0. ],
[45.49117435, 20.72311273, 37.14663244, ..., 0. ,
0. , 0. ],
[42.24030341, 42.58320086, 42.91734008, ..., 0. ,
0. , 0. ]]),
array([[234.39957515, 73.77623146, -24.38871135, ..., 0. ,
0. , 0. ],
[213.30787057, 103.8524491 , 57.34040169, ..., 0. ,
0. , 0. ],
[216.34076353, 185.83914267, 241.57628802, ..., 0. ,
0. , 0. ],
...,
[ 69.83848219, -24.47420188, -32.72040495, ..., 0. ,
0. , 0. ],
[110.37261708, 89.83350752, 131.3491711 , ..., 0. ,
0. , 0. ],
[216.32641271, 213.41261753, 238.29387495, ..., 0. ,
0. , 0. ]])]
In [ ]:
# Write out the resampled data
with rasterio.open(train_raster_path) as src:
profile = src.profile
profile.update(dtype=rasterio.float32, count=len(trainrs))
with rasterio.open('trainbands_sentinel_munich.tif', 'w', **profile) as dst:
for i, array in enumerate(trainrs, start=1):
dst.write(array.astype(rasterio.float32), i)
with rasterio.open(val_raster_path) as src:
profile = src.profile
profile.update(dtype=rasterio.float32, count=len(valrs))
with rasterio.open('valbands_sentinel_munich.tif', 'w', **profile) as dst:
for i, array in enumerate(valrs, start=1):
dst.write(array.astype(rasterio.float32), i)
In [ ]:
def rasters_to_dataframe(raster_paths, class_raster_path):
# Read class raster
with rasterio.open(class_raster_path) as class_raster:
class_data = class_raster.read(1).flatten()
data = {'class': class_data}
for i, raster_path in enumerate(raster_paths):
with rasterio.open(raster_path) as raster:
raster_data = raster.read(1).flatten()
band_name = f'band_{i+1}'
data[band_name] = raster_data
df = pd.DataFrame(data)
return df
# Define paths to training and validation set rasters
train_raster_path = os.path.join("data_m/munich_training.tif")
val_raster_path = os.path.join("data_m/munich_validation.tif")
In [ ]:
# Open the first raster in crst to get crs and transform
with rasterio.open(b8) as src: # Assuming b8 is a file path to a raster in crst
src_transform = src.transform
src_crs = src.crs
# Resample our stack to the training and validation datasets
trainrs = [resample_to_raster(raster, src_transform, src_crs, train_raster_path) for raster in crst]
valrs = [resample_to_raster(raster, src_transform, src_crs, val_raster_path) for raster in crst]
In [ ]:
# Write out the resampled data
with rasterio.open(train_raster_path) as src:
profile = src.profile
profile.update(dtype=rasterio.float32, count=len(trainrs))
with rasterio.open('trainbands_sentinel.tif', 'w', **profile) as dst:
for i, array in enumerate(trainrs, start=1):
dst.write(array.astype(rasterio.float32), i)
with rasterio.open(val_raster_path) as src:
profile = src.profile
profile.update(dtype=rasterio.float32, count=len(valrs))
with rasterio.open('valbands_sentinel.tif', 'w', **profile) as dst:
for i, array in enumerate(valrs, start=1):
dst.write(array.astype(rasterio.float32), i)
In [ ]:
# Transform the RasterStack into a DataFrame
# Here we need to flatten the raster arrays and combine them into a dataframe
# This can be memory intensive and might need optimization for large rasters
# Helper function to flatten and combine rasters into a dataframe
def rasters_to_dataframe(rasters, class_raster_path):
# Read class raster
with rasterio.open(class_raster_path) as class_raster:
class_data = class_raster.read(1).flatten()
data = {'class': class_data}
for i, raster in enumerate(rasters):
data[f'band_{i+1}'] = raster.flatten()
df = pd.DataFrame(data)
return df
# Create dataframes for training and validation
train_df = rasters_to_dataframe(trainrs, train_raster_path)
val_df = rasters_to_dataframe(valrs, val_raster_path)
In [ ]:
train_df
Out[ ]:
| class | band_1 | band_2 | band_3 | band_4 | band_5 | band_6 | band_7 | band_8 | band_9 | band_10 | band_11 | band_12 | band_13 | band_14 | band_15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 1 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 2 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 3 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 4 | 0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7620391 | 0 | 0.122571 | 0.146188 | 0.128295 | 0.0 | 0.0 | 0.0 | 0.514778 | 0.0 | 9.357737 | 0.600985 | 0.600985 | 0.901408 | 1353.875209 | 66.497524 | 147.741836 |
| 7620392 | 0 | 0.123020 | 0.145848 | 0.127469 | 0.0 | 0.0 | 0.0 | 0.516879 | 0.0 | 9.329054 | 0.604347 | 0.604347 | 0.906450 | 1352.562503 | 64.711552 | 152.540054 |
| 7620393 | 0 | 0.123262 | 0.144189 | 0.127049 | 0.0 | 0.0 | 0.0 | 0.515817 | 0.0 | 9.366624 | 0.604743 | 0.604743 | 0.907044 | 1347.913460 | 55.913481 | 160.105623 |
| 7620394 | 0 | 0.121898 | 0.144145 | 0.126898 | 0.0 | 0.0 | 0.0 | 0.514479 | 0.0 | 9.388263 | 0.604293 | 0.604293 | 0.906369 | 1343.701400 | 58.257346 | 165.394981 |
| 7620395 | 0 | 0.120907 | 0.144483 | 0.126878 | 0.0 | 0.0 | 0.0 | 0.512640 | 0.0 | 9.413135 | 0.603202 | 0.603202 | 0.904732 | 1341.801145 | 61.849239 | 167.330073 |
7620396 rows × 16 columns
In [ ]:
# Combine training and validation dataframes
combined_df = pd.concat([train_df, val_df], ignore_index=True)
# Drop rows where class is 0 and rows with infinite values
#combined_df = combined_df[(combined_df['class'] != 0) & ~np.isinf(combined_df).any(axis=1)]
# Drop 'X' and 'Y' if they exist in the dataframe
combined_df = combined_df.drop(columns=['X', 'Y'], errors='ignore')
combined_df
Out[ ]:
| class | band_1 | band_2 | band_3 | band_4 | band_5 | band_6 | band_7 | band_8 | band_9 | band_10 | band_11 | band_12 | band_13 | band_14 | band_15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10820336 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 10820337 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 10820338 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 10820339 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 10820340 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
10820341 rows × 16 columns
In [ ]:
from sklearn.preprocessing import LabelEncoder
# Assuming 'combined_df' is your DataFrame and it includes a 'class' column
# Plot a histogram of the 'class' column
plt.hist(combined_df['class'], bins='auto') # 'auto' lets matplotlib decide the number of bins
plt.title('Class Distribution')
plt.xlabel('Class')
plt.ylabel('Frequency')
plt.show()
# Convert the 'class' column to a numerical format if it's categorical
if combined_df['class'].dtype == 'object':
le = LabelEncoder()
combined_df['class'] = le.fit_transform(combined_df['class'])
# Describe the DataFrame to check for Inf, NaNs, or other odd patterns
description = combined_df.describe(include='all') # include='all' to get statistics for all columns
print(description)
# Print out the schema of the DataFra
combined_df.dtypes
class band_1 band_2 band_3 band_4 \
count 1.082034e+07 1.082034e+07 1.082034e+07 1.082034e+07 1.082034e+07
mean 6.296555e-01 1.374456e-01 1.575983e-01 1.541511e-01 4.622750e-02
std 7.745073e-01 6.067584e-02 6.745948e-02 7.305796e-02 9.070470e-02
min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
25% 0.000000e+00 1.223063e-01 1.418066e-01 1.253014e-01 0.000000e+00
50% 0.000000e+00 1.400209e-01 1.650636e-01 1.542141e-01 0.000000e+00
75% 1.000000e+00 1.663457e-01 1.896646e-01 1.937141e-01 0.000000e+00
max 2.000000e+00 1.956800e+00 1.843200e+00 1.771200e+00 1.736900e+00
band_5 band_6 band_7 band_8 band_9 \
count 1.082034e+07 1.082034e+07 1.082034e+07 1.082034e+07 1.082034e+07
mean 7.080163e-02 7.920400e-02 3.213029e-01 8.398045e-02 1.068471e+01
std 1.378557e-01 1.550626e-01 1.361907e-01 1.644535e-01 4.308195e+00
min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
25% 0.000000e+00 0.000000e+00 2.858879e-01 0.000000e+00 1.020321e+01
50% 0.000000e+00 0.000000e+00 3.423224e-01 0.000000e+00 1.169260e+01
75% 0.000000e+00 0.000000e+00 4.020957e-01 0.000000e+00 1.305446e+01
max 1.709000e+00 1.694475e+00 1.688000e+00 1.676200e+00 2.822862e+01
band_10 band_11 band_12 band_13 band_14 \
count 1.082034e+07 1.082034e+07 1.082034e+07 1.082034e+07 1.082034e+07
mean 4.384063e-01 4.384063e-01 6.575357e-01 1.539329e+03 1.305443e+01
std 1.457378e+00 1.457378e+00 2.185665e+00 6.751483e+02 4.525055e+01
min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 -2.256087e+03
25% 1.493996e-01 1.493996e-01 2.240770e-01 1.346875e+03 -5.723911e+00
50% 3.335757e-01 3.335757e-01 5.003158e-01 1.589444e+03 1.420160e+01
75% 4.873635e-01 4.873635e-01 7.309766e-01 1.879604e+03 3.851817e+01
max 3.077051e+01 3.077051e+01 4.614493e+01 1.839159e+04 3.180503e+03
band_15
count 1.082034e+07
mean 2.144795e+02
std 1.322087e+02
min -5.754183e+03
25% 1.490908e+02
50% 2.239966e+02
75% 2.964437e+02
max 7.018870e+03
Out[ ]:
class uint8 band_1 float64 band_2 float64 band_3 float64 band_4 float64 band_5 float64 band_6 float64 band_7 float64 band_8 float64 band_9 float64 band_10 float64 band_11 float64 band_12 float64 band_13 float64 band_14 float64 band_15 float64 dtype: object
In [ ]:
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score, make_scorer
from xgboost import XGBClassifier
import numpy as np
# Assuming 'combined_df' is the DataFrame with your data and 'class' is the target column
y = combined_df['class'].values
#y = combined_df['class'].values - 1 # Subtract 1 to make classes start from 0
X = combined_df.drop(columns=['class']).values
# Initialize the XGBoost classifier
tree = XGBClassifier(random_state=123) # Tree is a reserved keyword in Python, so we use 'tree' variable
# Define the cross-validation strategy
cv_strategy = StratifiedKFold(n_splits=6, shuffle=True, random_state=123)
# Define scoring metrics
scoring_metrics = {
'balanced_accuracy': make_scorer(balanced_accuracy_score),
'kappa': make_scorer(cohen_kappa_score)
}
# Evaluate the model using cross-validation
pe = cross_validate(tree, X, y, cv=cv_strategy, scoring=scoring_metrics, verbose=1)
# Print the measurements
print(f"Balanced Accuracy: {np.mean(pe['test_balanced_accuracy'])}")
print(f"Cohen's Kappa: {np.mean(pe['test_kappa'])}")
Balanced Accuracy: 0.615085366767569 Cohen's Kappa: 0.4607336151786576
In [ ]:
# Check the shapes of all rasters in crst
raster_shapes = [raster.shape for raster in crst]
if not all(shape == raster_shapes[0] for shape in raster_shapes):
print("Not all rasters have the same shape:", raster_shapes)
else:
print("All rasters have the same shape:", raster_shapes[0])
# Assuming all rasters are now confirmed to be of the same shape
dfn = pd.DataFrame({f'band_{i}': raster.flatten() for i, raster in enumerate(crst)})
Not all rasters have the same shape: [(2341, 2928), (2341, 2928), (2341, 2928), (1171, 1464), (1171, 1464), (1171, 1464), (2341, 2928), (1171, 1464), (1, 2341, 2928), (1, 2341, 2928), (1, 2341, 2928), (1, 2341, 2928), (1, 2341, 2928), (1, 2341, 2928), (1, 2341, 2928)]
In [ ]:
from rasterio.warp import reproject, calculate_default_transform
def resize_raster_array(array, src_transform, src_crs, target_shape, target_bounds):
# Calculate the new transform
new_transform, width, height = calculate_default_transform(
src_crs, src_crs, target_shape[1], target_shape[0], *target_bounds)
# Set up an empty array for the resampled data
resampled_array = np.empty((height, width), dtype=array.dtype)
# Perform the resampling
reproject(
source=array,
destination=resampled_array,
src_transform=src_transform,
src_crs=src_crs,
dst_transform=new_transform,
dst_crs=src_crs,
resampling=Resampling.nearest
)
return resampled_array
# Assuming 'crst' is your list of rasters (numpy arrays)
# and 'b8' is the file path of a raster with the correct size
with rasterio.open(b8) as src:
target_shape = src.shape
src_transform = src.transform
src_crs = src.crs
target_bounds = src.bounds
# Resize rasters in 'crst' if needed
resized_crst = [resize_raster_array(raster, src_transform, src_crs, target_shape, target_bounds) if raster.shape != target_shape else raster for raster in crst]
# Convert resized_crst to a DataFrame
dfn = pd.DataFrame({f'band_{i}': raster.flatten() for i, raster in enumerate(resized_crst)})
In [ ]:
pred_reshaped
Out[ ]:
array([[2, 2, 1, ..., 0, 0, 0],
[2, 2, 2, ..., 0, 0, 0],
[2, 2, 2, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0]])
In [ ]:
# Assuming 'crst' is a list of rasters (numpy arrays)
# Convert the list of rasters to a DataFrame
#dfn = pd.DataFrame({f'band_{i}': raster.flatten() for i, raster in enumerate(crst)})
# Replace any non-finite values (like Inf) with 0
dfn = dfn.replace([np.inf, -np.inf], 0)
# Train the model
tree.fit(X, y) # X and y should be your training data and labels
# Now you can make predictions
pred = tree.predict(dfn)
nodata_value = -9999 # You can choose any valid int32 value as the nodata value
# Assuming 'srs' is one of the original rasters and has the same spatial dimensions as 'crst'
with rasterio.open(b8) as src: # Assuming b8 is a path to one of your rasters
profile = src.profile
profile.update(dtype=rasterio.int32, count=1, nodata=nodata_value)
# Reshape the prediction array to match the spatial dimensions and write to a new raster file
pred_reshaped = pred.reshape(src.shape).astype(np.int32)
with rasterio.open('prediction.tif', 'w', **profile) as dst:
dst.write(pred_reshaped, 1)
plt.imshow(pred_reshaped, cmap='viridis') # Change colormap as needed
plt.colorbar()
plt.title('Prediction Raster')
plt.show()
In [ ]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
# Create a new figure and axis object
fig, ax = plt.subplots()
# Define the colors for each class
class_colors = ['yellow', 'green', 'red'] # Class 0 = yellow, 1 = green, 2 = red
# Create a ListedColormap object based on the class colors
cmap = ListedColormap(class_colors)
# Assuming 'pred_reshaped' is your 2D array of predictions
# Replace any non-finite values (like Inf) with the nodata value
pred_reshaped = np.where(np.isfinite(pred_reshaped), pred_reshaped, nodata_value)
# Mask the nodata values so they are not colored in the plot
masked_predictions = np.ma.masked_where(pred_reshaped == nodata_value, pred_reshaped)
# Plot the masked predictions array with the custom colormap
cbar = ax.imshow(masked_predictions, cmap=cmap)
# Optionally, create a colorbar with labels
colorbar_labels = ['Class 0', 'Class 1', 'Class 2']
cbar = fig.colorbar(cbar, ticks=[0, 1, 2])
cbar.ax.set_yticklabels(colorbar_labels)
# Set the title
ax.set_title('Prediction Raster')
# Display the plot
plt.show()
In [ ]:
import matplotlib.pyplot as plt
# Your visualization function remains unchanged
def visualize_sentinel2(sentinel2_bands, upper_percentile, ax):
# Standard RGB composite for Sentinel-2: B04 (Red), B03 (Green), B02 (Blue)
rgb = np.stack([sentinel2_bands[2], sentinel2_bands[1], sentinel2_bands[0]], axis=-1)
max_val = np.percentile(rgb, upper_percentile * 100)
rgb_normalized = np.clip(rgb, 0, max_val) / max_val
ax.imshow(rgb_normalized)
ax.axis('off') # Turn off axis
# Create a figure with two subplots, one above the other
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12)) # Adjust figsize as needed
# Plot the original Sentinel-2 image in the first subplot
visualize_sentinel2(sentinel2_bands, 0.90, ax1)
# Plot the prediction raster in the second subplot
# Assuming 'pred_reshaped' is your 2D numpy array with prediction classes
class_colors = ['yellow', 'green', 'red'] # Class 0 = yellow, 1 = green, 2 = red
cmap = ListedColormap(class_colors)
masked_predictions = np.ma.masked_where(pred_reshaped == nodata_value, pred_reshaped)
cbar = ax2.imshow(masked_predictions, cmap=cmap)
fig.colorbar(cbar, ax=ax2, ticks=[0, 1, 2], fraction=0.036, pad=0.04) # Adjust colorbar size as needed
ax2.set_title('Prediction Raster')
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
In [ ]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
# Define the bounds of the subset you want to extract
start_row, start_col = 1000, 1000 # for example
end_row, end_col = 1500, 1500 # for example
# Adjust the visualization function to plot a subset
def visualize_sentinel2_subset(sentinel2_bands, upper_percentile, ax, bounds):
# Standard RGB composite for Sentinel-2: B04 (Red), B03 (Green), B02 (Blue)
rgb = np.stack([sentinel2_bands[2], sentinel2_bands[1], sentinel2_bands[0]], axis=-1)
# Crop the RGB composite image
rgb_subset = rgb[bounds[0]:bounds[2], bounds[1]:bounds[3], :]
max_val = np.percentile(rgb_subset, upper_percentile * 100)
rgb_normalized = np.clip(rgb_subset, 0, max_val) / max_val
ax.imshow(rgb_normalized)
ax.axis('off') # Turn off axis
# Create a figure with two subplots, one above the other
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12)) # Adjust figsize as needed
# Plot the subset of the original Sentinel-2 image in the first subplot
visualize_sentinel2_subset(sentinel2_bands, 0.90, ax1, (start_row, start_col, end_row, end_col))
# Plot the subset of the prediction raster in the second subplot
class_colors = ['yellow', 'green', 'red'] # Class 0 = yellow, 1 = green, 2 = red
cmap = ListedColormap(class_colors)
# Crop the prediction raster to the same subset bounds
pred_subset = pred_reshaped[start_row:end_row, start_col:end_col]
masked_predictions = np.ma.masked_where(pred_subset == nodata_value, pred_subset)
cbar = ax2.imshow(masked_predictions, cmap=cmap)
fig.colorbar(cbar, ax=ax2, ticks=[0, 1, 2], fraction=0.036, pad=0.04) # Adjust colorbar size as needed
ax2.set_title('Prediction Raster Subset')
# Adjust layout to prevent overlap
plt.tight_layout()
# Show the plot
plt.show()
In [ ]: